In [ ]:
%load_ext autoreload
%autoreload 2
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from pathlib import Path
import zipfile
from skimage.color import lab2rgb
from dataset import CelebADataset
from training import train_model #, load_model
import json
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchsummary import summary

Simple Convolutional Autoencoder (CAE) for 112x112 Images¶

With downloaded dataset¶

Data Preparation¶

In [ ]:
import os
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
from skimage.color import rgb2lab, lab2rgb

# Define the paths
image_folder = 'data/celebA/img_celeba'  # Replace with your images folder path

# Define a transform to convert images to grayscale and LAB
class RGBToGrayLAB:
    def __call__(self, img):
        img = np.array(img)
        lab = rgb2lab(img).astype("float32")
        l = lab[..., 0] / 100.0  # Scale L to [0, 1]
        ab = (lab[..., 1:] + 128) / 255.0  # Scale AB to [0, 1]
        return l, ab

    def __repr__(self):
        return self.__class__.__name__ + '()'

resize_transform = transforms.Resize((224, 224))

# Function to load images and apply transformations
def load_and_transform_images(image_folder, resize_transform, rgb_to_graylab):
    l_images = []
    ab_images = []
    for filename in os.listdir(image_folder):
        if filename.endswith('.jpg'):
            img_path = os.path.join(image_folder, filename)
            img = Image.open(img_path).convert('RGB')
            img = resize_transform(img)
            l, ab = rgb_to_graylab(img)
            l_images.append(torch.tensor(l).unsqueeze(0))  # Add channel dimension
            ab_images.append(torch.tensor(ab).permute(2, 0, 1))  # Rearrange dimensions to (C, H, W)
    l_images = torch.stack(l_images)
    ab_images = torch.stack(ab_images)
    return l_images, ab_images

# Load and transform the images
l_images, ab_images = load_and_transform_images(image_folder, resize_transform, RGBToGrayLAB())

# Display some example images
import matplotlib.pyplot as plt

fig, axs = plt.subplots(4, 4, figsize=(12, 12))

for i in range(8):
    # Original image in grayscale
    axs[i//2, (i % 2) * 2].imshow(l_images[i].numpy().squeeze(), cmap='gray')
    axs[i//2, (i % 2) * 2].axis('off')
    axs[i//2, (i % 2) * 2].set_title(f'Grayscale {i}')
    
    # Image in LAB color space
    img_lab = np.zeros((224, 224, 3), dtype=np.float32)
    img_lab[:,:,0] = l_images[i].numpy().squeeze() * 100  # L channel
    img_lab[:,:,1:] = (ab_images[i].numpy().transpose(1, 2, 0) * 255) - 128  # a and b channels
    img_rgb = lab2rgb(img_lab)
    axs[i//2, (i % 2) * 2 + 1].imshow(img_rgb)
    axs[i//2, (i % 2) * 2 + 1].axis('off')
    axs[i//2, (i % 2) * 2 + 1].set_title(f'LAB to RGB {i}')

plt.tight_layout()
plt.show()
No description has been provided for this image

Data Loader¶

In [ ]:
print(l_images.shape)
print(ab_images.shape)
torch.Size([998, 1, 224, 224])
torch.Size([998, 2, 224, 224])
In [ ]:
batch_size = 32
# Prepare the Datasets
all_dataset = CelebADataset(l_images=l_images, ab_images=ab_images, reduced=False)

# Definir las proporciones para entrenamiento, validación y prueba
train_ratio = 0.75
valid_ratio = 0.15
test_ratio = 0.1

# Calcular las longitudes de cada conjunto
total_count = len(all_dataset)
train_count = int(total_count * train_ratio)
valid_count = int(total_count * valid_ratio)
test_count = total_count - train_count - valid_count  # Asegura que sumen el total

# Establecer la semilla para reproducibilidad
torch.manual_seed(42)

# Dividir el dataset
train_dataset, valid_dataset, test_dataset = random_split(all_dataset, [train_count, valid_count, test_count])

# Crear DataLoader para cada conjunto
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
In [ ]:
# Imprimir las longitudes de los conjuntos
print("Tamaño del conjunto de entrenamiento:", len(train_dataset))
print("Tamaño del conjunto de validación:", len(valid_dataset))
print("Tamaño del conjunto de prueba:", len(test_dataset))
Tamaño del conjunto de entrenamiento: 748
Tamaño del conjunto de validación: 149
Tamaño del conjunto de prueba: 101
In [ ]:
print(all_dataset.ab_images.shape)
print(all_dataset.l_images.shape)
torch.Size([998, 2, 224, 224])
torch.Size([998, 1, 224, 224])
In [ ]:
import os
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
from skimage.color import rgb2lab, lab2rgb

# Define the paths
image_folder = 'data/celebA/img_celeba'  # Replace with your images folder path

# Función para cargar y transformar imágenes
def load_and_transform_images(image_folder, transform):
    l_images = []
    ab_images = []
    for filename in os.listdir(image_folder):
        if filename.endswith('.jpg'):
            img_path = os.path.join(image_folder, filename)
            img = Image.open(img_path).convert('RGB')
            l, ab = transform(img)
            l_images.append(l)
            ab_images.append(ab)
    l_images = np.array(l_images)
    ab_images = np.array(ab_images)
    return l_images, ab_images

# Definir la transformación
class RGBToGrayLAB:
    def __call__(self, img):
        img = np.array(img)
        lab = rgb2lab(img).astype("float32")
        l = lab[..., 0]  # [0, 100]
        ab = (lab[..., 1:] + 128) / 255.0  # [0, 1]
        return l, ab

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    RGBToGrayLAB()
])

l_images, ab_images = load_and_transform_images(image_folder, transform)

# Crear el dataset
dataset = CelebADataset(l_images, ab_images)

# Dividir el dataset en entrenamiento, validación y prueba
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Crear DataLoader para cada conjunto
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
In [ ]:
# Define the CAE architecture to colorize images (greyscale to LAB)

class CAE(nn.Module):
    def __init__(self):
        super(CAE, self).__init__()
        """
        encoder architecture explained:
        - input: 1x224x224
        - output: 8x28x28

        1. Conv2d: 1 input channel, 16 output channels, kernel size 3, stride 1, padding 1
        2. ReLU activation function
        3. MaxPool2d: kernel size 2, stride 2
        4. Conv2d: 16 input channels, 8 output channels, kernel size 3, stride 1, padding 1
        5. ReLU activation function
        6. MaxPool2d: kernel size 2, stride 2
        """
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        """
        decoder architecture explained:
        - input: 8x28x28
        - output: 2x224x224
        1. ConvTranspose2d: 8 input channels, 16 output channels, kernel size 3, stride 2, padding 1, output padding 1
        2. ReLU activation function
        3. ConvTranspose2d: 16 input channels, 2 output channels, kernel size 3, stride 2, padding 1, output padding 1
        4. Sigmoid activation function
        """
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 2, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )
         
    def forward(self, x):
        z = self.encoder(x)
        x_re = self.decoder(z)
        return x_re
In [ ]:
architecture = CAE()
summary(architecture, (1, 224, 224), batch_size=batch_size)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [32, 16, 224, 224]             160
              ReLU-2         [32, 16, 224, 224]               0
         MaxPool2d-3         [32, 16, 112, 112]               0
            Conv2d-4          [32, 8, 112, 112]           1,160
              ReLU-5          [32, 8, 112, 112]               0
         MaxPool2d-6            [32, 8, 56, 56]               0
   ConvTranspose2d-7         [32, 16, 112, 112]           1,168
              ReLU-8         [32, 16, 112, 112]               0
   ConvTranspose2d-9          [32, 2, 224, 224]             290
          Sigmoid-10          [32, 2, 224, 224]               0
================================================================
Total params: 2,778
Trainable params: 2,778
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 6.12
Forward/backward pass size (MB): 643.12
Params size (MB): 0.01
Estimated Total Size (MB): 649.26
----------------------------------------------------------------
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
In [ ]:
# Verificación de datos
dataiter = iter(train_loader)
l_images, ab_images = next(dataiter)
print(l_images.shape)  # Debe ser [batch_size, 1, 224, 224]
print(ab_images.shape)  # Debe ser [batch_size, 2, 224, 224]

# Entrenamiento del modelo
architecture = CAE()
file_name = "CelebA_cae_1.0"
train_model(train_loader, architecture, file_name, epochs=50)
torch.Size([32, 1, 224, 224])
torch.Size([32, 2, 224, 224])
Number of parameters: 2778
Epoch 1: 100%|██████████| 25/25 [00:20<00:00,  1.20it/s, loss=0.2268]
Epoch 2: 100%|██████████| 25/25 [00:17<00:00,  1.46it/s, loss=0.0662]
Epoch 3: 100%|██████████| 25/25 [00:35<00:00,  1.43s/it, loss=0.0068]
Epoch 4: 100%|██████████| 25/25 [00:31<00:00,  1.25s/it, loss=0.0032]
Epoch 5: 100%|██████████| 25/25 [00:34<00:00,  1.39s/it, loss=0.0017]
Epoch 6: 100%|██████████| 25/25 [00:12<00:00,  1.97it/s, loss=0.0009]
Epoch 7: 100%|██████████| 25/25 [00:25<00:00,  1.03s/it, loss=0.0006]
Epoch 8: 100%|██████████| 25/25 [00:13<00:00,  1.84it/s, loss=0.0006]
Epoch 9: 100%|██████████| 25/25 [00:13<00:00,  1.83it/s, loss=0.0005]
Epoch 10: 100%|██████████| 25/25 [00:21<00:00,  1.17it/s, loss=0.0004]
Epoch 11: 100%|██████████| 25/25 [00:40<00:00,  1.60s/it, loss=0.0002]
Epoch 12:  84%|████████▍ | 21/25 [00:37<00:07,  1.78s/it, loss=0.0002]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[46], line 10
      8 architecture = CAE()
      9 file_name = "CelebA_cae_1.0"
---> 10 train_model(train_loader, architecture, file_name, epochs=50)

File ~/Downloads/Actuales/ML/Final-Project-ML/training.py:52, in train_model(train_data_loader, architecture, file_name, epochs, learning_rate, model, device)
     50 outputs = architecture(gray)
     51 loss = criterion(outputs, color)
---> 52 loss.backward()
     53 optimizer.step()
     55 epoch_running_loss += loss.item()

File ~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/torch/_tensor.py:488, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    478 if has_torch_function_unary(self):
    479     return handle_torch_function(
    480         Tensor.backward,
    481         (self,),
   (...)
    486         inputs=inputs,
    487     )
--> 488 torch.autograd.backward(
    489     self, gradient, retain_graph, create_graph, inputs=inputs
    490 )

File ~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/torch/autograd/__init__.py:197, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    192     retain_graph = create_graph
    194 # The reason we repeat same the comment below is that
    195 # some Python versions print out the first line of a multi-line function
    196 # calls in the traceback and some print out the last line
--> 197 Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    198     tensors, grad_tensors_, retain_graph, create_graph, inputs,
    199     allow_unreachable=True, accumulate_grad=True)

KeyboardInterrupt: 
In [ ]:
# # Cargar el dataset
# test_dataset = datasets.ImageFolder(root='path/to/your/test/dataset', transform=transform)
# test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

# Cargar el modelo entrenado
model = CAE()
checkpoint = torch.load('trained_models/CelebA_cae_1.0_0.001_9.pt', map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Evaluación del modelo
dataiter = iter(test_loader)
images, _ = next(dataiter)
images = images.to(device)
outputs = model(images)

# Visualización de resultados
fig, axs = plt.subplots(4, 4, figsize=(12, 12))
for i in range(8):
    # Imagen en escala de grises
    gray_image = tensor_to_numpy(images[i]).squeeze()
    axs[i//2, (i % 2) * 2].imshow(gray_image, cmap='gray')
    axs[i//2, (i % 2) * 2].axis('off')

    # Imagen colorizada (predicción)
    ab_image = tensor_to_numpy(outputs[i])
    img_lab = np.zeros((224, 224, 3), dtype=np.float32)
    img_lab[:,:,0] = gray_image * 100  # L canal
    img_lab[:,:,1:] = (ab_image.transpose(1, 2, 0) - 0.5) * 255  # a y b canales
    img_rgb = lab2rgb(img_lab)
    axs[i//2, (i % 2) * 2 + 1].imshow(img_rgb)
    axs[i//2, (i % 2) * 2 + 1].axis('off')
plt.tight_layout()
plt.show()
No description has been provided for this image

Method 2¶

In [ ]:
import os
from pathlib import Path

# Import glob to get the files directories recursively
import glob

# Import Garbage collector interface
import gc 

# Import OpenCV to transforme pictures
import cv2

# Import Time
import time

# import numpy for math calculations
import numpy as np

# Import pandas for data (csv) manipulation
import pandas as pd

# Import matplotlib for plotting
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('fivethirtyeight') 
%matplotlib inline

import PIL
from PIL import Image
from skimage.color import rgb2lab, lab2rgb

import pytorch_lightning as pl

# Import pytorch to build Deel Learling Models 
import torch
from torch import nn, optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from torchvision import models
from torch.nn import functional as F
import torch.utils.data
from torchvision.models.inception import inception_v3
from scipy.stats import entropy

from torchsummary import summary

# Import tqdm to show a smart progress meter
from tqdm import tqdm
In [ ]:
def lab_to_rgb(L, ab):
    """
    Takes an image or a batch of images and converts from LAB space to RGB
    """
    L = L * 100  # Escalar L de [0, 1] a [0, 100]
    ab = (ab - 0.5) * 255  # Escalar a y b de [0, 1] a [-128, 127]
    Lab = torch.cat([L, ab], dim=1).detach().cpu().numpy()  # Concatenar L y ab, y convertir a numpy
    rgb_imgs = []
    for img in Lab:
        img_rgb = lab2rgb(img.transpose(1, 2, 0))
        rgb_imgs.append(img_rgb)
    return np.stack(rgb_imgs, axis=0)
In [ ]:
# Cargar el modelo entrenado
model = CAE()
checkpoint = torch.load('trained_models/CelebA_cae_1.0_0.001_9.pt', map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Evaluación del modelo
dataiter = iter(test_loader)
l_images, ab_images = next(dataiter)
l_images, ab_images = l_images.to(device), ab_images.to(device)
output_ab = model(l_images)

# Convertir imágenes originales y predicciones a RGB
original_rgb = lab_to_rgb(l_images, ab_images)
predicted_rgb = lab_to_rgb(l_images, output_ab)

# Visualización de resultados
fig, axs = plt.subplots(4, 4, figsize=(12, 12))

for i in range(8):
    # Imagen en color original
    axs[i//2, (i % 2) * 2].imshow(original_rgb[i])
    axs[i//2, (i % 2) * 2].axis('off')
    axs[i//2, (i % 2) * 2].set_title(f'Original {i}', fontsize=11)
    
    # Imagen colorizada (predicción)
    axs[i//2, (i % 2) * 2 + 1].imshow(predicted_rgb[i])
    axs[i//2, (i % 2) * 2 + 1].axis('off')
    axs[i//2, (i % 2) * 2 + 1].set_title(f'Predicción {i}', fontsize=11)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Cargar el modelo entrenado
model = CAE()
checkpoint = torch.load('trained_models/CelebA_cae_1.0_0.001_9.pt', map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Evaluación del modelo
dataiter = iter(test_loader)
l_images, ab_images = next(dataiter)
l_images, ab_images = l_images.to(device), ab_images.to(device)
output_ab = model(l_images)

# Convertir imágenes originales y predicciones a RGB
original_rgb = lab_to_rgb(l_images, ab_images)
predicted_rgb = lab_to_rgb(l_images, output_ab)

# Visualización de resultados
fig, axs = plt.subplots(4, 4, figsize=(12, 12))

for i in range(8):
    # Imagen en color original
    axs[i//2, (i % 2) * 2].imshow(original_rgb[i])
    axs[i//2, (i % 2) * 2].axis('off')
    axs[i//2, (i % 2) * 2].set_title(f'Original {i}', fontsize=11)
    
    # Imagen colorizada (predicción)
    axs[i//2, (i % 2) * 2 + 1].imshow(predicted_rgb[i])
    axs[i//2, (i % 2) * 2 + 1].axis('off')
    axs[i//2, (i % 2) * 2 + 1].set_title(f'Predicción {i}', fontsize=11)

plt.tight_layout()
plt.show()
No description has been provided for this image